library(brms)
## Loading required package: rstan
## Loading required package: ggplot2
## Loading required package: StanHeaders
## rstan (Version 2.12.1, packaged: 2016-09-11 13:07:50 UTC, GitRev: 85f7a56811da)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## rstan_options(auto_write = TRUE)
## options(mc.cores = parallel::detectCores())
## Loading 'brms' package (version 1.1.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms').
library(rethinking)
## Loading required package: parallel
## rethinking (Version 1.58)
##
## Attaching package: 'rethinking'
## The following objects are masked from 'package:brms':
##
## LOO, WAIC, stancode
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
The prior of normal(0,1) will provide more shrinkage
Instead of
a_group ~ Normal(0,10)
use
a_group ~ normal(a,sigma) a ~ (0,10) sigma ~ cacuhy(0,1)
alpha only
data(reedfrogs)
d <- reedfrogs
d$tank <- 1:nrow(d)
m12m1.tank <- map2stan(
alist(
surv ~ dbinom( density , p ) ,
logit(p) <- a_tank[tank] ,
a_tank[tank] ~ dnorm( a , sigma ) ,
a ~ dnorm(0,1) ,
sigma ~ dcauchy(0,1)
), data=d , iter=4000 , chains=4 )
## Warning in FUN(X[[i]], ...): data with name pred is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name size is not numeric and not
## used
## The following numerical problems occured the indicated number of times after warmup on chain 3
## count
## Exception thrown at line 17: normal_log: Scale parameter is 0, but must be > 0! 2
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## Warning in FUN(X[[i]], ...): data with name pred is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name size is not numeric and not
## used
## Computing WAIC
## Constructing posterior predictions
## Aggregated binomial counts detected. Splitting to 0/1 outcome for WAIC calculation.
plot(m12m1.tank,ask=FALSE)
## Waiting to draw page 2 of 4
## Waiting to draw page 3 of 4
## Waiting to draw page 4 of 4
precis(m12m1.tank)
## 48 vector or matrix parameters omitted in display. Use depth=2 to show them.
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a 1.30 0.25 0.88 1.68 8000 1
## sigma 1.62 0.21 1.29 1.95 4718 1
with predation
d$pred2 <- ifelse(d$pred=="pred",1,0)
m12m1.tank.pred <- map2stan(
alist(
surv ~ dbinom( density , p ) ,
logit(p) <- a_tank[tank] + b_pred*pred2 ,
a_tank[tank] ~ dnorm( a , sigma ) ,
a ~ dnorm(0,1) ,
sigma ~ dcauchy(0,1),
b_pred ~ dnorm(0,5)
), data=d , iter=4000 , chains=4 )
## Warning in FUN(X[[i]], ...): data with name pred is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name size is not numeric and not
## used
## The following numerical problems occured the indicated number of times after warmup on chain 1
## count
## Exception thrown at line 20: normal_log: Scale parameter is 0, but must be > 0! 1
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## The following numerical problems occured the indicated number of times after warmup on chain 3
## count
## Exception thrown at line 20: normal_log: Scale parameter is 0, but must be > 0! 1
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## The following numerical problems occured the indicated number of times after warmup on chain 4
## count
## Exception thrown at line 20: normal_log: Scale parameter is 0, but must be > 0! 2
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## Warning in FUN(X[[i]], ...): data with name pred is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name size is not numeric and not
## used
## Computing WAIC
## Constructing posterior predictions
## Aggregated binomial counts detected. Splitting to 0/1 outcome for WAIC calculation.
plot(m12m1.tank.pred,ask=FALSE)
## Waiting to draw page 2 of 4
## Waiting to draw page 3 of 4
## Waiting to draw page 4 of 4
precis(m12m1.tank.pred)
## 48 vector or matrix parameters omitted in display. Use depth=2 to show them.
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a 2.56 0.23 2.18 2.91 1379 1
## sigma 0.83 0.14 0.62 1.07 2784 1
## b_pred -2.52 0.30 -2.99 -2.04 987 1
with size
d$big <- ifelse(d$size=="big",1,0)
m12m1.tank.size <- map2stan(
alist(
surv ~ dbinom( density , p ) ,
logit(p) <- a_tank[tank] + b_big*big ,
a_tank[tank] ~ dnorm( a , sigma ) ,
a ~ dnorm(0,1) ,
sigma ~ dcauchy(0,1),
b_big ~ dnorm(0,5)
), data=d , iter=4000 , chains=4 )
## Warning in FUN(X[[i]], ...): data with name pred is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name size is not numeric and not
## used
## The following numerical problems occured the indicated number of times after warmup on chain 3
## count
## Exception thrown at line 20: normal_log: Scale parameter is 0, but must be > 0! 1
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## The following numerical problems occured the indicated number of times after warmup on chain 4
## count
## Exception thrown at line 20: normal_log: Scale parameter is 0, but must be > 0! 1
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## Warning in FUN(X[[i]], ...): data with name pred is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name size is not numeric and not
## used
## Computing WAIC
## Constructing posterior predictions
## Aggregated binomial counts detected. Splitting to 0/1 outcome for WAIC calculation.
plot(m12m1.tank.size,ask=FALSE)
## Waiting to draw page 2 of 4
## Waiting to draw page 3 of 4
## Waiting to draw page 4 of 4
precis(m12m1.tank.size)
## 48 vector or matrix parameters omitted in display. Use depth=2 to show them.
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a 1.42 0.35 0.86 1.98 1368 1.00
## sigma 1.63 0.22 1.28 1.96 5315 1.00
## b_big -0.25 0.50 -1.08 0.51 879 1.01
additive, with pred and size
m12m1.tank.pred.size <- map2stan(
alist(
surv ~ dbinom( density , p ) ,
logit(p) <- a_tank[tank] + b_big*big + b_pred*pred2,
a_tank[tank] ~ dnorm( a , sigma ) ,
a ~ dnorm(0,1) ,
sigma ~ dcauchy(0,1),
c(b_big,b_pred) ~ dnorm(0,5)
), data=d , iter=4000 , chains=4 )
## Warning in FUN(X[[i]], ...): data with name pred is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name size is not numeric and not
## used
## The following numerical problems occured the indicated number of times after warmup on chain 2
## count
## Exception thrown at line 23: normal_log: Scale parameter is 0, but must be > 0! 1
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## The following numerical problems occured the indicated number of times after warmup on chain 4
## count
## Exception thrown at line 23: normal_log: Scale parameter is 0, but must be > 0! 1
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## Warning in FUN(X[[i]], ...): data with name pred is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name size is not numeric and not
## used
## Computing WAIC
## Constructing posterior predictions
## Aggregated binomial counts detected. Splitting to 0/1 outcome for WAIC calculation.
plot(m12m1.tank.pred.size,ask=FALSE)
## Waiting to draw page 2 of 4
## Waiting to draw page 3 of 4
## Waiting to draw page 4 of 4
precis(m12m1.tank.pred.size)
## 48 vector or matrix parameters omitted in display. Use depth=2 to show them.
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a 2.73 0.27 2.32 3.17 1007 1
## sigma 0.78 0.15 0.55 1.00 2667 1
## b_big -0.38 0.29 -0.86 0.08 1755 1
## b_pred -2.51 0.30 -2.99 -2.03 1198 1
interaction, with pred and size
m12m1.tank.pred.size.int <- map2stan(
alist(
surv ~ dbinom( density , p ) ,
logit(p) <- a_tank[tank] + b_big*big + b_pred*pred2 + b_big_pred*big*pred2,
a_tank[tank] ~ dnorm( a , sigma ) ,
a ~ dnorm(0,1) ,
sigma ~ dcauchy(0,1),
c(b_big,b_pred,b_big_pred) ~ dnorm(0,5)
), data=d , iter=4000 , chains=4 )
## Warning in FUN(X[[i]], ...): data with name pred is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name size is not numeric and not
## used
## The following numerical problems occured the indicated number of times after warmup on chain 2
## count
## Exception thrown at line 25: normal_log: Scale parameter is 0, but must be > 0! 1
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## The following numerical problems occured the indicated number of times after warmup on chain 3
## count
## Exception thrown at line 25: normal_log: Scale parameter is 0, but must be > 0! 2
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## Warning in FUN(X[[i]], ...): data with name pred is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name size is not numeric and not
## used
## Computing WAIC
## Constructing posterior predictions
## Aggregated binomial counts detected. Splitting to 0/1 outcome for WAIC calculation.
plot(m12m1.tank.pred.size.int,ask=FALSE)
## Waiting to draw page 2 of 4
## Waiting to draw page 3 of 4
## Waiting to draw page 4 of 4
precis(m12m1.tank.pred.size.int)
## 48 vector or matrix parameters omitted in display. Use depth=2 to show them.
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a 2.36 0.30 1.88 2.84 661 1
## sigma 0.75 0.14 0.52 0.97 1922 1
## b_big 0.42 0.45 -0.30 1.14 1025 1
## b_pred -1.88 0.39 -2.52 -1.27 886 1
## b_big_pred -1.35 0.58 -2.25 -0.40 1416 1
par(mfrow=c(1,1))
Focus on the inferred variation across tanks. Explain why it changes as it does across models
At first pass we can just look at the sigma parameter from each model as this is the estimate of adaptive estimate of standard deviation from tank to tank.
precis(m12m1.tank)
## 48 vector or matrix parameters omitted in display. Use depth=2 to show them.
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a 1.30 0.25 0.88 1.68 8000 1
## sigma 1.62 0.21 1.29 1.95 4718 1
precis(m12m1.tank.pred)
## 48 vector or matrix parameters omitted in display. Use depth=2 to show them.
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a 2.56 0.23 2.18 2.91 1379 1
## sigma 0.83 0.14 0.62 1.07 2784 1
## b_pred -2.52 0.30 -2.99 -2.04 987 1
precis(m12m1.tank.size)
## 48 vector or matrix parameters omitted in display. Use depth=2 to show them.
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a 1.42 0.35 0.86 1.98 1368 1.00
## sigma 1.63 0.22 1.28 1.96 5315 1.00
## b_big -0.25 0.50 -1.08 0.51 879 1.01
precis(m12m1.tank.pred.size)
## 48 vector or matrix parameters omitted in display. Use depth=2 to show them.
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a 2.73 0.27 2.32 3.17 1007 1
## sigma 0.78 0.15 0.55 1.00 2667 1
## b_big -0.38 0.29 -0.86 0.08 1755 1
## b_pred -2.51 0.30 -2.99 -2.03 1198 1
precis(m12m1.tank.pred.size.int)
## 48 vector or matrix parameters omitted in display. Use depth=2 to show them.
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a 2.36 0.30 1.88 2.84 661 1
## sigma 0.75 0.14 0.52 0.97 1922 1
## b_big 0.42 0.45 -0.30 1.14 1025 1
## b_pred -1.88 0.39 -2.52 -1.27 886 1
## b_big_pred -1.35 0.58 -2.25 -0.40 1416 1
Basically we see that having predation in the model reduces variance among tanks. This is because predation is a strong predicor of survival, so including it in the model reduces the otherwise unexplained tank to tank variance.
Compare the models you fit just above, using WAIC. Can you reconcile the differences in WAIC with the posterior distributions of the models?
compare(m12m1.tank,m12m1.tank.pred,m12m1.tank.size,m12m1.tank.pred.size,m12m1.tank.pred.size.int)
## WAIC pWAIC dWAIC weight SE dSE
## m12m1.tank.pred 1000.1 28.7 0.0 0.38 37.39 NA
## m12m1.tank.pred.size 1000.3 27.8 0.2 0.35 37.34 1.64
## m12m1.tank.pred.size.int 1000.9 27.9 0.8 0.26 37.65 3.00
## m12m1.tank 1008.8 37.4 8.6 0.01 37.95 6.51
## m12m1.tank.size 1010.1 38.0 9.9 0.00 38.13 6.62
Models that include pred have a smaller number of effective parameters and a lower WAIC. This makes sense w.r.t. the posterior distributions; tanks
m12m1.tank.pred.size.int.b <-
brm(surv | trials(density) ~ 0 + (1| tank) + pred*size,
data=d,
family=binomial(link = "logit"),
prior=c(set_prior("cauchy(0,1)", class = "sd"),
set_prior("normal(0,5)", class = "b")))
## Compiling the C++ model
plot(m12m1.tank.pred.size.int.b)
m12m1.tank.pred.size.int.b
## Family: binomial (logit)
## Formula: surv | trials(density) ~ 0 + (1 | tank) + pred * size
## Data: d (Number of observations: 48)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
## WAIC: Not computed
##
## Group-Level Effects:
## ~tank (Number of levels: 48)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.74 0.14 0.49 1.06 1823 1
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## predno 2.77 0.34 2.11 3.44 2161 1
## predpred -0.44 0.25 -0.92 0.05 2022 1
## sizesmall -0.15 0.47 -1.06 0.75 1835 1
## predpred:sizesmall 1.06 0.58 -0.09 2.24 1464 1
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
precis(m12m1.tank.pred.size.int)
## 48 vector or matrix parameters omitted in display. Use depth=2 to show them.
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a 2.36 0.30 1.88 2.84 661 1
## sigma 0.75 0.14 0.52 0.97 1922 1
## b_big 0.42 0.45 -0.30 1.14 1025 1
## b_pred -1.88 0.39 -2.52 -1.27 886 1
## b_big_pred -1.35 0.58 -2.25 -0.40 1416 1
Refit reed frog data but use Cauchy prior for the varying intercepts. Compare to Gaussian prior. Explain.
First, with Gausian
data(reedfrogs)
d <- reedfrogs
str(d)
# make the tank cluster variable
d$tank <- 1:nrow(d)
d$tank2 <- as.character(d$tank)
m12.2 <- map2stan(
alist(
surv ~ dbinom( density , p ) ,
logit(p) <- a_tank[tank] ,
a_tank[tank] ~ dnorm( a , sigma ) ,
a ~ dnorm(0,1) ,
sigma ~ dcauchy(0,1)
), data=d , iter=4000 , chains=4 )
## Warning in FUN(X[[i]], ...): data with name pred is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name size is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name tank2 is not numeric and not
## used
## The following numerical problems occured the indicated number of times after warmup on chain 1
## count
## Exception thrown at line 17: normal_log: Scale parameter is 0, but must be > 0! 1
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## The following numerical problems occured the indicated number of times after warmup on chain 4
## count
## Exception thrown at line 17: normal_log: Scale parameter is 0, but must be > 0! 1
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## Warning in FUN(X[[i]], ...): data with name pred is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name size is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name tank2 is not numeric and not
## used
## Computing WAIC
## Constructing posterior predictions
## Aggregated binomial counts detected. Splitting to 0/1 outcome for WAIC calculation.
plot(m12.2,ask=FALSE)
## Waiting to draw page 2 of 4
## Waiting to draw page 3 of 4
## Waiting to draw page 4 of 4
precis(m12.2)
## 48 vector or matrix parameters omitted in display. Use depth=2 to show them.
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a 1.30 0.25 0.89 1.69 8000 1
## sigma 1.62 0.21 1.29 1.95 5418 1
Now with Cauchy prior for a intercepts
m12.2.cauchy <- map2stan(
alist(
surv ~ dbinom( density , p ) ,
logit(p) <- a_tank[tank] ,
a_tank[tank] ~ dcauchy( a , sigma ) ,
a ~ dnorm(0,1) ,
sigma ~ dcauchy(0,1)
), data=d , iter=4000 , chains=4 )
## Warning in FUN(X[[i]], ...): data with name pred is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name size is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name tank2 is not numeric and not
## used
## The following numerical problems occured the indicated number of times after warmup on chain 1
## count
## Exception thrown at line 17: cauchy_log: Scale parameter is 0, but must be > 0! 1
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## The following numerical problems occured the indicated number of times after warmup on chain 4
## count
## Exception thrown at line 17: cauchy_log: Scale parameter is 0, but must be > 0! 1
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## Warning in FUN(X[[i]], ...): data with name pred is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name size is not numeric and not
## used
## Warning in FUN(X[[i]], ...): data with name tank2 is not numeric and not
## used
## Computing WAIC
## Constructing posterior predictions
## Aggregated binomial counts detected. Splitting to 0/1 outcome for WAIC calculation.
plot(m12.2.cauchy,ask=FALSE)
## Waiting to draw page 2 of 4
## Waiting to draw page 3 of 4
## Waiting to draw page 4 of 4
precis(m12.2.cauchy,depth=2)
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a_tank[1] 2.01 0.87 0.76 3.30 3689 1.00
## a_tank[2] 6.89 13.45 0.62 11.03 445 1.01
## a_tank[3] 1.09 0.60 0.10 2.01 6200 1.00
## a_tank[4] 7.14 13.74 0.34 12.72 297 1.01
## a_tank[5] 1.99 0.82 0.66 3.16 5328 1.00
## a_tank[6] 2.01 0.84 0.67 3.16 4410 1.00
## a_tank[7] 19.08 84.60 0.63 17.77 67 1.06
## a_tank[8] 2.02 0.88 0.69 3.26 4003 1.00
## a_tank[9] -0.07 0.65 -1.12 0.94 6902 1.00
## a_tank[10] 2.01 0.84 0.74 3.21 4993 1.00
## a_tank[11] 1.11 0.62 0.08 2.05 6419 1.00
## a_tank[12] 0.73 0.63 -0.23 1.76 6601 1.00
## a_tank[13] 1.09 0.62 0.19 2.14 7054 1.00
## a_tank[14] 0.35 0.65 -0.64 1.41 6686 1.00
## a_tank[15] 2.01 0.84 0.76 3.28 4424 1.00
## a_tank[16] 2.03 0.85 0.68 3.20 4489 1.00
## a_tank[17] 2.85 0.93 1.46 4.16 3671 1.00
## a_tank[18] 2.26 0.64 1.24 3.20 5099 1.00
## a_tank[19] 1.92 0.55 1.02 2.72 6390 1.00
## a_tank[20] 13.18 44.90 1.51 17.94 431 1.01
## a_tank[21] 2.27 0.67 1.22 3.24 5002 1.00
## a_tank[22] 2.30 0.68 1.26 3.32 4327 1.00
## a_tank[23] 2.28 0.67 1.29 3.32 4612 1.00
## a_tank[24] 1.66 0.48 0.87 2.36 5743 1.00
## a_tank[25] -1.05 0.47 -1.77 -0.29 6288 1.00
## a_tank[26] 0.24 0.41 -0.42 0.86 6689 1.00
## a_tank[27] -1.59 0.57 -2.46 -0.69 5581 1.00
## a_tank[28] -0.45 0.42 -1.16 0.20 6712 1.00
## a_tank[29] 0.24 0.40 -0.41 0.85 7198 1.00
## a_tank[30] 1.44 0.45 0.68 2.11 6348 1.00
## a_tank[31] -0.63 0.42 -1.28 0.06 7875 1.00
## a_tank[32] -0.28 0.42 -0.96 0.38 6667 1.00
## a_tank[33] 3.25 0.96 1.81 4.63 4269 1.00
## a_tank[34] 2.59 0.67 1.51 3.56 5612 1.00
## a_tank[35] 2.62 0.69 1.55 3.63 4282 1.00
## a_tank[36] 1.97 0.48 1.20 2.72 5869 1.00
## a_tank[37] 1.98 0.49 1.16 2.68 6045 1.00
## a_tank[38] 18.21 69.32 1.83 20.57 125 1.03
## a_tank[39] 2.60 0.67 1.56 3.57 4966 1.00
## a_tank[40] 2.24 0.57 1.35 3.09 5088 1.00
## a_tank[41] -2.01 0.55 -2.90 -1.18 5950 1.00
## a_tank[42] -0.57 0.36 -1.15 0.01 7017 1.00
## a_tank[43] -0.44 0.35 -1.02 0.10 7175 1.00
## a_tank[44] -0.32 0.35 -0.87 0.25 6700 1.00
## a_tank[45] 0.65 0.35 0.09 1.20 7161 1.00
## a_tank[46] -0.56 0.37 -1.16 0.00 7851 1.00
## a_tank[47] 1.97 0.49 1.20 2.73 6910 1.00
## a_tank[48] 0.04 0.34 -0.50 0.59 5739 1.00
## a 1.42 0.30 0.96 1.89 3687 1.00
## sigma 1.02 0.23 0.65 1.38 3421 1.00
Get posterior estimates of a_tank intercepts
library(reshape2)
post.gauss <- extract.samples(m12.2)
post.cauchy <- extract.samples(m12.2.cauchy)
d$est.gauss <- logistic(apply(post.gauss$a_tank,2,mean))
d$est.cauchy <- logistic(apply(post.cauchy$a_tank,2,mean) )
head(d)
## density pred size surv propsurv tank tank2 est.gauss est.cauchy
## 1 10 no big 9 0.9 1 1 0.8917184 0.8822184
## 2 10 no big 10 1.0 2 2 0.9545020 0.9989807
## 3 10 no big 7 0.7 3 3 0.7313764 0.7484382
## 4 10 no big 10 1.0 4 4 0.9547966 0.9992059
## 5 10 no small 9 0.9 5 5 0.8935928 0.8802017
## 6 10 no small 9 0.9 6 6 0.8930508 0.8814373
plot it
library(ggplot2)
d.melt <- melt(d,measure.vars = c("propsurv","est.gauss","est.cauchy"))
head(d.melt)
## density pred size surv tank tank2 variable value
## 1 10 no big 9 1 1 propsurv 0.9
## 2 10 no big 10 2 2 propsurv 1.0
## 3 10 no big 7 3 3 propsurv 0.7
## 4 10 no big 10 4 4 propsurv 1.0
## 5 10 no small 9 5 5 propsurv 0.9
## 6 10 no small 9 6 6 propsurv 0.9
pl <- ggplot(d.melt,aes(y=value,x=tank,color=variable,shape=variable))
pl <- pl + geom_point(size=2)
pl <- pl + facet_wrap(~ density, scales = "free_x")
pl <- pl + geom_hline(yintercept=logistic(mean(post.gauss$a)),lty=2)
pl
For the most part, cauchy causes more shrinkage. This is because it is a fat-tailed distrubution. It does not shrink the most extreme tanks as much, however, and I do not understand why.
Analyze bangladeshi data to model contraception use by district. Model using separate intercepts for each district and pooled information across districts
get the data
data("bangladesh")
colnames(bangladesh) <- sub(".","_",colnames(bangladesh),fixed=TRUE)
bangladesh$district_id <- coerce_index(bangladesh$district)
#bangladesh$district_id <- as.factor(as.numeric(bangladesh$district))
summary(bangladesh)
## woman district use_contraception living_children
## Min. : 1.0 Min. : 1.00 Min. :0.0000 Min. :1.000
## 1st Qu.: 484.2 1st Qu.:14.00 1st Qu.:0.0000 1st Qu.:1.000
## Median : 967.5 Median :29.00 Median :0.0000 Median :3.000
## Mean : 967.5 Mean :29.35 Mean :0.3925 Mean :2.652
## 3rd Qu.:1450.8 3rd Qu.:45.00 3rd Qu.:1.0000 3rd Qu.:4.000
## Max. :1934.0 Max. :61.00 Max. :1.0000 Max. :4.000
## age_centered urban district_id
## Min. :-13.560000 Min. :0.0000 Min. : 1.00
## 1st Qu.: -7.559900 1st Qu.:0.0000 1st Qu.:13.00
## Median : -1.559900 Median :0.0000 Median :29.00
## Mean : 0.002198 Mean :0.2906 Mean :29.49
## 3rd Qu.: 6.440000 3rd Qu.:1.0000 3rd Qu.:45.00
## Max. : 19.440000 Max. :1.0000 Max. :60.00
head(bangladesh)
## woman district use_contraception living_children age_centered urban
## 1 1 1 0 4 18.4400 1
## 2 2 1 0 1 -5.5599 1
## 3 3 1 0 3 1.4400 1
## 4 4 1 0 4 8.4400 1
## 5 5 1 0 1 -13.5590 1
## 6 6 1 0 1 -11.5600 1
## district_id
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
fixed intercepts model
mb1 <- map2stan(alist(
use_contraception ~ dbinom(1,p),
logit(p) <- a[district_id],
a[district_id] ~ dnorm(0,5)),
data=bangladesh,
chains = 4)
## Computing WAIC
## Constructing posterior predictions
plot(mb1,ask=FALSE)
## Waiting to draw page 2 of 4
## Waiting to draw page 3 of 4
## Waiting to draw page 4 of 4
precis(mb1,depth = 2)
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a[1] -1.07 0.21 -1.37 -0.70 3472 1
## a[2] -2.81 1.16 -4.46 -0.99 2565 1
## a[3] -6.31 2.69 -10.05 -2.40 2868 1
## a[4] -0.65 0.39 -1.27 -0.05 3751 1
## a[5] -0.35 0.43 -1.08 0.28 4000 1
## a[6] 0.53 0.19 0.22 0.83 4000 1
## a[7] -0.58 0.45 -1.24 0.17 4000 1
## a[8] 0.21 0.46 -0.53 0.92 4000 1
## a[9] -0.92 0.46 -1.60 -0.14 4000 1
## a[10] -0.67 0.32 -1.19 -0.17 4000 1
## a[11] -0.50 0.40 -1.15 0.10 4000 1
## a[12] -0.63 0.47 -1.38 0.13 4000 1
## a[13] -0.44 0.56 -1.32 0.46 3907 1
## a[14] -0.47 0.49 -1.23 0.33 4000 1
## a[15] -1.48 0.59 -2.46 -0.57 4000 1
## a[16] -1.08 0.59 -2.06 -0.16 4000 1
## a[17] -2.83 1.10 -4.57 -1.13 3393 1
## a[18] -0.21 0.25 -0.62 0.15 4000 1
## a[19] -0.51 0.59 -1.49 0.41 4000 1
## a[20] -1.54 0.39 -2.13 -0.89 4000 1
## a[21] -1.16 0.33 -1.71 -0.64 3715 1
## a[22] -0.98 0.41 -1.62 -0.33 3682 1
## a[23] 4.31 3.01 -0.23 8.72 3058 1
## a[24] -0.04 0.25 -0.44 0.36 4000 1
## a[25] -0.18 0.36 -0.74 0.39 4000 1
## a[26] -1.40 0.52 -2.24 -0.61 3815 1
## a[27] -0.30 0.57 -1.13 0.68 3967 1
## a[28] 0.66 0.36 0.05 1.19 4000 1
## a[29] 0.00 0.29 -0.50 0.45 4000 1
## a[30] -0.63 0.53 -1.48 0.22 3959 1
## a[31] 0.17 0.58 -0.67 1.14 4000 1
## a[32] -0.98 0.60 -1.85 0.05 4000 1
## a[33] -0.01 0.40 -0.65 0.63 4000 1
## a[34] 0.00 0.37 -0.57 0.59 4000 1
## a[35] -0.14 0.31 -0.66 0.35 4000 1
## a[36] 0.00 0.39 -0.63 0.61 4000 1
## a[37] 0.20 0.63 -0.80 1.19 4000 1
## a[38] 0.14 0.31 -0.34 0.63 3943 1
## a[39] -1.31 0.48 -2.01 -0.50 4000 1
## a[40] -0.71 0.35 -1.24 -0.16 4000 1
## a[41] 0.10 0.22 -0.24 0.46 4000 1
## a[42] -0.14 0.54 -1.02 0.68 3740 1
## a[43] 0.10 0.31 -0.42 0.58 4000 1
## a[44] -5.01 2.94 -9.63 -0.70 2014 1
## a[45] -0.59 0.33 -1.13 -0.08 4000 1
## a[46] -0.10 0.48 -0.91 0.62 4000 1
## a[47] -0.17 0.34 -0.67 0.41 4000 1
## a[48] -0.24 0.26 -0.66 0.17 4000 1
## a[49] -0.35 0.48 -1.11 0.41 4000 1
## a[50] -1.89 1.20 -3.73 -0.12 3098 1
## a[51] 0.33 0.31 -0.16 0.83 3363 1
## a[52] -1.53 0.51 -2.37 -0.76 4000 1
## a[53] -0.19 0.35 -0.72 0.38 4000 1
## a[54] -2.47 1.12 -4.26 -0.86 4000 1
## a[55] -0.91 0.28 -1.34 -0.46 4000 1
## a[56] -1.31 0.44 -2.00 -0.62 4000 1
## a[57] -1.33 0.38 -1.91 -0.72 4000 1
## a[58] -1.01 0.54 -1.86 -0.13 2975 1
## a[59] -0.52 0.35 -1.12 -0.01 4000 1
## a[60] -0.85 0.46 -1.58 -0.12 4000 1
mb2 <- map2stan(alist(
use_contraception ~ dbinom(1,p),
logit(p) <- a_district[district_id],
a_district[district_id] ~ dnorm(a,sigma),
a ~ dnorm(0,5),
sigma ~ dcauchy(0,1)),
data=bangladesh,
chains = 4)
## The following numerical problems occured the indicated number of times after warmup on chain 1
## count
## Exception thrown at line 16: normal_log: Scale parameter is 0, but must be > 0! 1
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## Computing WAIC
## Constructing posterior predictions
plot(mb2,ask=FALSE)
## Waiting to draw page 2 of 5
## Waiting to draw page 3 of 5
## Waiting to draw page 4 of 5
## Waiting to draw page 5 of 5
precis(mb2,depth=2)
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a_district[1] -1.00 0.19 -1.29 -0.68 4000 1
## a_district[2] -1.15 0.44 -1.83 -0.43 4000 1
## a_district[3] -1.57 0.44 -2.25 -0.90 4000 1
## a_district[4] -0.61 0.32 -1.11 -0.11 4000 1
## a_district[5] -0.42 0.34 -0.94 0.13 4000 1
## a_district[6] 0.40 0.18 0.10 0.67 4000 1
## a_district[7] -0.56 0.35 -1.09 0.01 4000 1
## a_district[8] -0.11 0.35 -0.67 0.43 4000 1
## a_district[9] -0.75 0.34 -1.27 -0.20 4000 1
## a_district[10] -0.64 0.26 -1.07 -0.23 4000 1
## a_district[11] -0.50 0.32 -1.01 0.00 4000 1
## a_district[12] -0.60 0.35 -1.14 -0.05 4000 1
## a_district[13] -0.48 0.37 -1.01 0.13 4000 1
## a_district[14] -0.51 0.35 -1.04 0.07 4000 1
## a_district[15] -0.97 0.37 -1.56 -0.38 4000 1
## a_district[16] -0.78 0.39 -1.44 -0.21 4000 1
## a_district[17] -1.21 0.43 -1.85 -0.50 4000 1
## a_district[18] -0.28 0.23 -0.64 0.09 4000 1
## a_district[19] -0.51 0.38 -1.11 0.10 4000 1
## a_district[20] -1.19 0.29 -1.65 -0.72 4000 1
## a_district[21] -0.97 0.28 -1.41 -0.52 4000 1
## a_district[22] -0.81 0.31 -1.28 -0.30 4000 1
## a_district[23] -0.23 0.51 -1.09 0.51 4000 1
## a_district[24] -0.14 0.24 -0.51 0.24 4000 1
## a_district[25] -0.30 0.29 -0.76 0.16 4000 1
## a_district[26] -0.99 0.37 -1.57 -0.41 4000 1
## a_district[27] -0.44 0.38 -1.06 0.16 4000 1
## a_district[28] 0.28 0.30 -0.20 0.74 4000 1
## a_district[29] -0.13 0.26 -0.53 0.30 4000 1
## a_district[30] -0.59 0.37 -1.20 -0.03 4000 1
## a_district[31] -0.23 0.38 -0.82 0.39 4000 1
## a_district[32] -0.73 0.40 -1.40 -0.12 4000 1
## a_district[33] -0.20 0.32 -0.71 0.29 4000 1
## a_district[34] -0.18 0.31 -0.66 0.32 4000 1
## a_district[35] -0.26 0.27 -0.68 0.19 4000 1
## a_district[36] -0.20 0.32 -0.67 0.36 4000 1
## a_district[37] -0.23 0.40 -0.86 0.40 4000 1
## a_district[38] -0.03 0.26 -0.41 0.42 4000 1
## a_district[39] -0.97 0.35 -1.47 -0.38 4000 1
## a_district[40] -0.66 0.28 -1.07 -0.17 4000 1
## a_district[41] 0.00 0.20 -0.29 0.33 4000 1
## a_district[42] -0.35 0.37 -0.96 0.21 4000 1
## a_district[43] -0.07 0.27 -0.51 0.35 4000 1
## a_district[44] -0.88 0.49 -1.66 -0.14 4000 1
## a_district[45] -0.58 0.28 -1.01 -0.14 4000 1
## a_district[46] -0.30 0.35 -0.88 0.24 4000 1
## a_district[47] -0.28 0.28 -0.73 0.20 4000 1
## a_district[48] -0.29 0.22 -0.64 0.06 4000 1
## a_district[49] -0.42 0.35 -0.99 0.12 4000 1
## a_district[50] -0.79 0.47 -1.53 -0.06 4000 1
## a_district[51] 0.10 0.27 -0.33 0.53 4000 1
## a_district[52] -1.08 0.35 -1.59 -0.50 4000 1
## a_district[53] -0.30 0.30 -0.76 0.18 4000 1
## a_district[54] -1.02 0.43 -1.68 -0.31 4000 1
## a_district[55] -0.81 0.24 -1.22 -0.45 4000 1
## a_district[56] -1.01 0.32 -1.50 -0.49 4000 1
## a_district[57] -1.06 0.30 -1.52 -0.58 4000 1
## a_district[58] -0.77 0.37 -1.39 -0.22 4000 1
## a_district[59] -0.52 0.29 -0.98 -0.06 4000 1
## a_district[60] -0.72 0.35 -1.32 -0.20 4000 1
## a -0.54 0.09 -0.68 -0.40 2373 1
## sigma 0.52 0.08 0.40 0.66 1150 1
3 ways of getting predictions
library(reshape2)
pred.df <- data.frame(district_id = unique(bangladesh$district_id))
link.vary <- link(mb2,data=pred.df,n=4000)
## [ 400 / 4000 ]
[ 800 / 4000 ]
[ 1200 / 4000 ]
[ 1600 / 4000 ]
[ 2000 / 4000 ]
[ 2400 / 4000 ]
[ 2800 / 4000 ]
[ 3200 / 4000 ]
[ 3600 / 4000 ]
[ 4000 / 4000 ]
pred.df$est.vary.link <- apply(link.vary,2,mean)
pred.df$est.vary.coef <- logistic(coef(mb2)[1:60])
#these are the posterior means. See help page for map2stan.
post.vary <- extract.samples(mb2)$a_district
pred.df$est.vary.extract.samples <- logistic(apply(post.vary,2,mean))
cor(pred.df[,2:4])
## est.vary.link est.vary.coef
## est.vary.link 1.0000000 0.0688281
## est.vary.coef 0.0688281 1.0000000
## est.vary.extract.samples 0.0688281 1.0000000
## est.vary.extract.samples
## est.vary.link 0.0688281
## est.vary.coef 1.0000000
## est.vary.extract.samples 1.0000000
So coef returns the same numbers as extracting the posterior samples and taking the mean. Link is every so slightly different.
plot.df <- data.frame(
district_id=1:60,
fixed=logistic(coef(mb1)),
varying=logistic(coef(mb2)[1:60]),
observed=tapply(bangladesh$use_contraception,bangladesh$district_id,function(x) sum(x)/length(x)))
plot.df.m <- melt(plot.df,id.var="district_id")
## Warning: attributes are not identical across measure variables; they will
## be dropped
pl <- ggplot(plot.df.m,aes(x=district_id,y=value,color=variable,shape=variable))
pl+geom_point(size=3)+geom_hline(yintercept = logistic(coef(mb2)["a"]))